/*******************************************************************************
* Objective: Generate IPUMS informality and industry-informality datasets
*******************************************************************************/

* Set Stata version
version 16

* Stablish Working Directory ***************************************************
cd "$workdirectory"

* Settings *********************************************************************
capture log close
clear all
set more off

********************************************************************************
**# Part 1: Generate informality dataset
********************************************************************************

/*******************************************************************************
Step 1: Importing and combining raw datasets
*******************************************************************************/

local folder_i raw_datasets\IPUMS_labor\informality

* Importing into Stata

foreach region in lac africa asia fiji png{
	foreach gend in all{
		
		* Importing CLASSWK data * -------------------
		import delimited `folder_i'/`region'_classwk_`gend'.csv, clear
		
		* Cleaning
		drop if type_var == "Column percent"
		drop type_var
		drop if census == "COL TOTAL"
		
		* Renaming for easier merging with classwkd variables
		foreach v of varlist _all{
			rename `v' a_`v'
		}
		
		rename a_census census
		
		tempfile `region'_classwk_`gend'
		save ``region'_classwk_`gend'', replace
		
		
		* Importing CLASSWKD data* -------------------
		import delimited `folder_i'/`region'_classwkd_`gend'.csv, clear
		
		* Cleaning
		drop if type_var == "Column percent"
		drop type_var
		drop if census == "COL TOTAL"	
		
		tempfile `region'_classwkd_`gend'
		save ``region'_classwkd_`gend'', replace
	}
}

clear all

* Appending regions for classwk and classwkd

foreach vartype in classwk classwkd{
	foreach gend in all{
		use `lac_`vartype'_`gend'', clear
		append using `africa_`vartype'_`gend''
		append using `asia_`vartype'_`gend''
		append using `fiji_`vartype'_`gend''
		append using `png_`vartype'_`gend''
		tempfile dataset_`vartype'_`gend'
		save `dataset_`vartype'_`gend'', replace
	}
}

* Merging classwk and classwkd
foreach gend in all{
	use `dataset_classwk_`gend'', clear
	merge 1:1 census using `dataset_classwkd_`gend''
	save "processed_datasets/dataset_ipums_informality_`gend'(09.22.2021).dta", replace
}

/*******************************************************************************
Step 2: Cleaning data and generate shares
*******************************************************************************/

use "processed_datasets/dataset_ipums_informality_all(09.22.2021).dta", clear
	
*-------------------------------------------------------------------------------
* Cleaning classwkd variables

keep census a_niunotinuniverse a_selfemployed a_wagesalaryworker a_unpaidworker a_other a_unknownmissing a_rowtotal selfemployed employer sharecropperemployer workingonownaccount ownaccountagriculture domesticworkerselfemployed subsistenceworkerownconsumption ownaccountother memberofcooperative sharecropper sharecropperselfemployed sharecropperemployee ownaccountwithouttemporaryunpaid ownaccountwithtemporaryunpaidhel

*-------------------------------------------------------------------------------
* Creating new variables agregating classwkd categories

* Self Employed (Unspecified)
rename selfemployed selfemployed_unspecified

* Self Employed - Employer
egen selfemployed_employer = rowtotal(employer sharecropperemployer)
replace selfemployed_employer = . if employer == . & sharecropperemployer == .

* Self Employed - Own Account	
egen selfemployed_ownaccount = rowtotal(workingonownaccount ownaccountagriculture domesticworkerselfemployed subsistenceworkerownconsumption ownaccountother ownaccountwithouttemporaryunpaid ownaccountwithtemporaryunpaidhel)
replace selfemployed_ownaccount = . if workingonownaccount == . & ownaccountagriculture == . & domesticworkerselfemployed == . & subsistenceworkerownconsumption == . & ownaccountother == . & ownaccountwithouttemporaryunpaid == . & ownaccountwithtemporaryunpaidhel == .

* Self Employed - Cooperative
rename memberofcooperative selfemployed_cooperative

* Self Employed - Sharecropper
egen selfemployed_sharecropper = rowtotal(sharecropper sharecropperselfemployed sharecropperemployee)
replace selfemployed_sharecropper = . if sharecropper == . & sharecropperselfemployed == . & sharecropperemployee == .

*---------------------------------------------------------------------------	
* Cleaning again

keep census a_wagesalaryworker a_unpaidworker a_unknownmissing a_selfemployed a_rowtotal a_other a_niunotinuniverse selfemployed_sharecropper selfemployed_ownaccount selfemployed_unspecified selfemployed_employer selfemployed_cooperative

*---------------------------------------------------------------------------
* Generating shares
* Notes: Shares will not add up to 100 if using s_semployed and s_selfemployed_* variables together, as the later are the result of breaking down s_semployed in its different categories

gen s_wage_worker =  (a_wagesalaryworker/(a_rowtotal-a_niunotinuniverse))
la var s_wage_worker "Share Wage/salary worker"

gen s_unpaidworker =  (a_unpaidworker/(a_rowtotal-a_niunotinuniverse))
la var s_unpaidworker "Share Unpaid worker"

gen s_unknownmissing =  (a_unknownmissing/(a_rowtotal-a_niunotinuniverse))
la var s_unknownmissing "Share Unknown/missing"

gen s_selfemployed =  (a_selfemployed/(a_rowtotal-a_niunotinuniverse))
la var s_selfemployed "Share Self-employed"

gen s_other =  (a_other/(a_rowtotal-a_niunotinuniverse))
la var s_other "Share Other"

gen s_selfemployed_sharecropper =  (selfemployed_sharecropper/(a_rowtotal-a_niunotinuniverse))
la var s_selfemployed_sharecropper "Share selfemployed_sharecropper"

gen s_selfemployed_ownaccount =  (selfemployed_ownaccount/(a_rowtotal-a_niunotinuniverse))
la var s_selfemployed_ownaccount "Share selfemployed_ownaccount"

gen s_selfemployed_unspecified =  (selfemployed_unspecified/(a_rowtotal-a_niunotinuniverse))
la var s_selfemployed_unspecified "Share selfemployed_unspecified"

gen s_selfemployed_employer =  (selfemployed_employer/(a_rowtotal-a_niunotinuniverse))
la var s_selfemployed_employer "Share selfemployed_employer"

gen s_selfemployed_cooperative =  (selfemployed_cooperative/(a_rowtotal-a_niunotinuniverse))
la var s_selfemployed_cooperative "Share selfemployed_cooperative"

*-------------------------------------------------------------------------------
* Formating data for compatibility with other datasets

* Generating country and year variable
gen country = regexs(0) if regexm(census, "(([a-zA-Z]+)[ ]*([a-zA-Z]+))")
gen year = regexs(0) if(regexm(census, "[0-9][0-9][0-9][0-9]$"))
drop census

destring year, replace

drop if country == "Armenia"
drop if country == "Palestine"
drop if country == "Saint Lucia"
drop if country == "Israel"
drop if country == "Kyrgyz Republic"
replace country = "Venezuela, RB" if country == "Venezuela"
replace country = "Iran, Islamic Rep." if country == "Iran"
replace country = "Egypt, Arab Rep." if country == "Egypt"
replace country = "Lao PDR" if country == "Laos"
replace country = "Papua New Guinea" if country == "Papua New"
drop if country == "South Sudan"
	
save "processed_datasets/dataset_ipums_informality_all(09.22.2021).dta", replace

********************************************************************************
**# Part 2: Generate informality by industry dataset
********************************************************************************

clear all

/*******************************************************************************
Step 1: Importing and combining raw datasets
*******************************************************************************/

********************************************************************************
* Importing CLASSWK
********************************************************************************

* Importing LAC - *-1990 -------------------------------------------------------

foreach page in NIU self_employed wage_worker unpaid_worker other unknown all_class no_data{
	import excel raw_datasets\IPUMS_labor\informality\lac_indgen_classwk_pre1990.xlsx, sheet("`page'") firstrow clear
		
		* Dropping type_var
		drop type_var
		
		* Renaming industries
		rename NIUnotinuniverse 			niu
		rename Agriculturefishingandfor 	agri
		rename Miningandextraction 			mining
		rename Manufacturing 				mfg
		rename Electricitygaswaterandw 		utilities
		rename Construction 				construct
		rename Wholesaleandretailtrade 		trade
		rename Hotelsandrestaurants 		hospitality
		rename Transportationstorageand 	transport
		rename Financialservicesandinsura	finance
		rename Publicadministrationandde 	govmt
		rename Servicesnotspecified 		unspec_service
		rename Businessservicesandreale 	buss_service
		rename Education 					educ
		rename Healthandsocialwork 			health
		rename Otherservices 				other_services
		rename Privatehouseholdservices		household_serv
		rename Otherindustrynec 			other_industry
		rename Responsesuppressed			supressed
		rename Unknown						unknowns
		rename ROWTOTAL						rowtotal
		rename NoData						nodata
		
		* Renaming variables for reshaping
		foreach v of varlist _all{
		    rename `v' `page'`v'
		}
		
		rename `page'census census
		
		* Reshaping
		reshape long `page', i(census) j(industry, string)
		
		* Saving
		tempfile lac_ck_a_`page'
		save `lac_ck_a_`page'', replace
}

* Importing LAC - 1990-* -------------------------------------------------------

foreach page in NIU self_employed wage_worker unpaid_worker other unknown all_class no_data{
	import excel raw_datasets\IPUMS_labor\informality\lac_indgen_classwk_post1991.xlsx, sheet("`page'") firstrow clear
		
		* Dropping type_var
		drop type_var
		
		* Renaming industries
		rename NIUnotinuniverse 			niu
		rename Agriculturefishingandfor 	agri
		rename Miningandextraction 			mining
		rename Manufacturing 				mfg
		rename Electricitygaswaterandw 		utilities
		rename Construction 				construct
		rename Wholesaleandretailtrade 		trade
		rename Hotelsandrestaurants 		hospitality
		rename Transportationstorageand 	transport
		rename Financialservicesandinsura	finance
		rename Publicadministrationandde 	govmt
		rename Servicesnotspecified 		unspec_service
		rename Businessservicesandreale 	buss_service
		rename Education 					educ
		rename Healthandsocialwork 			health
		rename Otherservices 				other_services
		rename Privatehouseholdservices		household_serv
		rename Otherindustrynec 			other_industry
		rename Responsesuppressed			supressed
		rename Unknown						unknowns
		rename ROWTOTAL						rowtotal
		rename NoData						nodata
		
		* Renaming variables for reshaping
		foreach v of varlist _all{
		    rename `v' `page'`v'
		}
		
		rename `page'census census
		
		* Reshaping
		reshape long `page', i(census) j(industry, string)
		
		* Saving
		tempfile lac_ck_b_`page'
		save `lac_ck_b_`page'', replace
}

* Importing Africa - *-1990 ----------------------------------------------------

foreach page in NIU self_employed wage_worker unpaid_worker other unknown all_class no_data{
	import excel raw_datasets\IPUMS_labor\informality\africa_indgen_classwk_pre1990.xlsx, sheet("`page'") firstrow clear
		
		* Dropping type_var
		drop type_var
		
		* Renaming industries
		rename NIUnotinuniverse 			niu
		rename Agriculturefishingandfor 	agri
		rename Miningandextraction 			mining
		rename Manufacturing 				mfg
		rename Electricitygaswaterandw 		utilities
		rename Construction 				construct
		rename Wholesaleandretailtrade 		trade
		rename Hotelsandrestaurants 		hospitality
		rename Transportationstorageand 	transport
		rename Financialservicesandinsura	finance
		rename Publicadministrationandde 	govmt
		rename Servicesnotspecified 		unspec_service
		rename Businessservicesandreale 	buss_service
		rename Education 					educ
		rename Healthandsocialwork 			health
		rename Otherservices 				other_services
		rename Privatehouseholdservices		household_serv
		rename Otherindustrynec 			other_industry
		rename Responsesuppressed			supressed
		rename Unknown						unknowns
		rename ROWTOTAL						rowtotal
		rename NoData						nodata
		
		* Renaming variables for reshaping
		foreach v of varlist _all{
		    rename `v' `page'`v'
		}
		rename `page'census census
		
		* Reshaping
		reshape long `page', i(census) j(industry, string)
		
		* Saving
		tempfile africa_ck_a_`page' 
		save `africa_ck_a_`page'', replace
}

* Importing Africa 1991 -*  ----------------------------------------------------

foreach page in NIU self_employed wage_worker unpaid_worker other unknown all_class no_data{
	import excel raw_datasets\IPUMS_labor\informality\africa_indgen_classwk_post1991.xlsx, sheet("`page'") firstrow clear
		
		* Dropping type_var
		drop type_var
		
		* Renaming industries
		rename NIUnotinuniverse 			niu
		rename Agriculturefishingandfor 	agri
		rename Miningandextraction 			mining
		rename Manufacturing 				mfg
		rename Electricitygaswaterandw 		utilities
		rename Construction 				construct
		rename Wholesaleandretailtrade 		trade
		rename Hotelsandrestaurants 		hospitality
		rename Transportationstorageand 	transport
		rename Financialservicesandinsura	finance
		rename Publicadministrationandde 	govmt
		rename Servicesnotspecified 		unspec_service
		rename Businessservicesandreale 	buss_service
		rename Education 					educ
		rename Healthandsocialwork 			health
		rename Otherservices 				other_services
		rename Privatehouseholdservices		household_serv
		rename Otherindustrynec 			other_industry
		rename Responsesuppressed			supressed
		rename Unknown						unknowns
		rename ROWTOTAL						rowtotal
		rename NoData						nodata
		
		* Renaming variables for reshaping
		foreach v of varlist _all{
		    rename `v' `page'`v'
		}
		rename `page'census census
		
		* Reshaping
		reshape long `page', i(census) j(industry, string)
		
		* Saving
		tempfile africa_ck_b_`page'
		save `africa_ck_b_`page'', replace
}

* Importing Asia -*  -----------------------------------------------------------

foreach page in NIU self_employed wage_worker unpaid_worker other unknown all_class no_data{
	import excel raw_datasets\IPUMS_labor\informality\asia_indgen_classwk.xlsx, sheet("`page'") firstrow clear
		
		* Dropping type_var
		drop type_var
		
		* Renaming industries
		rename NIUnotinuniverse 			niu
		rename Agriculturefishingandfor 	agri
		rename Miningandextraction 			mining
		rename Manufacturing 				mfg
		rename Electricitygaswaterandw 		utilities
		rename Construction 				construct
		rename Wholesaleandretailtrade 		trade
		rename Hotelsandrestaurants 		hospitality
		rename Transportationstorageand 	transport
		rename Financialservicesandinsura	finance
		rename Publicadministrationandde 	govmt
// 		rename Servicesnotspecified 		unspec_service
		rename Businessservicesandreale 	buss_service
		rename Education 					educ
		rename Healthandsocialwork 			health
		rename Otherservices 				other_services
		rename Privatehouseholdservices		household_serv
		rename Otherindustrynec 			other_industry
		rename Responsesuppressed			supressed
		rename Unknown						unknowns
		rename ROWTOTAL						rowtotal
		rename NoData						nodata
		
		* Renaming variables for reshaping
		foreach v of varlist _all{
		    rename `v' `page'`v'
		}
		rename `page'census census
		
		* Reshaping
		reshape long `page', i(census) j(industry, string)
		
		* Saving
		tempfile asia_ck_`page'
		save `asia_ck_`page'', replace
}

* Importing Fiji -*  -----------------------------------------------------------

foreach page in NIU self_employed wage_worker unpaid_worker other all_class no_data{
	import excel raw_datasets\IPUMS_labor\informality\fiji_indgen_classwk.xlsx, sheet("`page'") firstrow clear
		
		* Dropping type_var
		drop type_var
		
		* Renaming industries
		rename NIUnotinuniverse 			niu
		rename Agriculturefishingandfor 	agri
		rename Miningandextraction 			mining
		rename Manufacturing 				mfg
		rename Electricitygaswaterandw 		utilities
		rename Construction 				construct
		rename Wholesaleandretailtrade 		trade
		rename Hotelsandrestaurants 		hospitality
		rename Transportationstorageand 	transport
		rename Financialservicesandinsura	finance
		rename Publicadministrationandde 	govmt
		rename Servicesnotspecified 		unspec_service
		rename Businessservicesandreale 	buss_service
		rename Education 					educ
		rename Healthandsocialwork 			health
		rename Otherservices 				other_services
		rename Privatehouseholdservices		household_serv
		rename Otherindustrynec 			other_industry
// 		rename Responsesuppressed			supressed
		rename Unknown						unknowns
		rename ROWTOTAL						rowtotal
// 		rename NoData						nodata
		
		* Renaming variables for reshaping
		foreach v of varlist _all{
		    rename `v' `page'`v'
		}
		rename `page'census census
		
		* Reshaping
		reshape long `page', i(census) j(industry, string)
		
		* Saving
		tempfile fiji_ck_`page'
		save `fiji_ck_`page'', replace
}

* Importing PNG -*  ------------------------------------------------------------

foreach page in NIU self_employed wage_worker unpaid_worker other unknown all_class{
	import excel raw_datasets\IPUMS_labor\informality\png_indgen_classwk.xlsx, sheet("`page'") firstrow clear
		
		* Dropping type_var
		drop type_var
		
		* Renaming industries
		rename NIUnotinuniverse 			niu
		rename Agriculturefishingandfor 	agri
		rename Miningandextraction 			mining
		rename Manufacturing 				mfg
		rename Electricitygaswaterandw 		utilities
		rename Construction 				construct
		rename Wholesaleandretailtrade 		trade
		rename Hotelsandrestaurants 		hospitality
		rename Transportationstorageand 	transport
		rename Financialservicesandinsura	finance
		rename Publicadministrationandde 	govmt
*		rename Servicesnotspecified 		unspec_service
		rename Businessservicesandreale 	buss_service
		rename Education 					educ
		rename Healthandsocialwork 			health
		rename Otherservices 				other_services
		rename Privatehouseholdservices		household_serv
*		rename Otherindustrynec 			other_industry
// 		rename Responsesuppressed			supressed
		rename Unknown						unknowns
		rename ROWTOTAL						rowtotal
 		rename NoData						nodata
		
		* Renaming variables for reshaping
		foreach v of varlist _all{
		    rename `v' `page'`v'
		}
		rename `page'census census
		
		* Reshaping
		reshape long `page', i(census) j(industry, string)
		
		* Saving
		tempfile png_ck_`page'
		save `png_ck_`page'', replace
}

********************************************************************************
* Importing CLASSWKD - Only self-employment categories
********************************************************************************

* Importing LAC - *-1990 -------------------------------------------------------

foreach page in self_emp employer scrop_employer own_ac own_ac_agri own_ac_other coop scrop scrop_self scrop_employee{
	import excel raw_datasets\IPUMS_labor\informality\lac_indgen_classwkd_pre1990.xlsx, sheet("`page'") firstrow clear
		
		* Dropping type_var
		drop type_var
		
		* Renaming industries
		rename NIUnotinuniverse 			niu
		rename Agriculturefishingandfor 	agri
		rename Miningandextraction 			mining
		rename Manufacturing 				mfg
		rename Electricitygaswaterandw 		utilities
		rename Construction 				construct
		rename Wholesaleandretailtrade 		trade
		rename Hotelsandrestaurants 		hospitality
		rename Transportationstorageand 	transport
		rename Financialservicesandinsura	finance
		rename Publicadministrationandde 	govmt
// 		rename Servicesnotspecified 		unspec_service
		rename Businessservicesandreale 	buss_service
		rename Education 					educ
		rename Healthandsocialwork 			health
		rename Otherservices 				other_services
		rename Privatehouseholdservices		household_serv
		rename Otherindustrynec 			other_industry
		rename Responsesuppressed			supressed
		rename Unknown						unknowns
		rename ROWTOTAL						rowtotal
		rename NoData						nodata
		
		* Renaming variables for reshaping
		foreach v of varlist _all{
		    rename `v' se_`page'`v'
		}
		rename se_`page'census census
		
		* Reshaping
		reshape long se_`page', i(census) j(industry, string)
		
		* Saving
		tempfile lac_ckd_a_`page'
		save `lac_ckd_a_`page'', replace
}

* Importing LAC - 1991-* -------------------------------------------------------

foreach page in self_emp employer own_ac own_ac_dom own_ac_subs own_ac_other coop scrop_self scrop_employee{
	import excel raw_datasets\IPUMS_labor\informality\lac_indgen_classwkd_post1991.xlsx, sheet("`page'") firstrow clear
		
		* Dropping type_var
		drop type_var
		
		* Renaming industries
		rename NIUnotinuniverse 			niu
		rename Agriculturefishingandfor 	agri
		rename Miningandextraction 			mining
		rename Manufacturing 				mfg
		rename Electricitygaswaterandw 		utilities
		rename Construction 				construct
		rename Wholesaleandretailtrade 		trade
		rename Hotelsandrestaurants 		hospitality
		rename Transportationstorageand 	transport
		rename Financialservicesandinsura	finance
		rename Publicadministrationandde 	govmt
		rename Servicesnotspecified 		unspec_service
		rename Businessservicesandreale 	buss_service
		rename Education 					educ
		rename Healthandsocialwork 			health
		rename Otherservices 				other_services
		rename Privatehouseholdservices		household_serv
		rename Otherindustrynec 			other_industry
		rename Responsesuppressed			supressed
		rename Unknown						unknowns
		rename ROWTOTAL						rowtotal
// 		rename NoData						nodata
		
		* Renaming variables for reshaping
		foreach v of varlist _all{
		    rename `v' se_`page'`v'
		}
		rename se_`page'census census
		
		* Reshaping
		reshape long se_`page', i(census) j(industry, string)
		
		* Saving
		tempfile lac_ckd_b_`page'
		save `lac_ckd_b_`page'', replace
}

* Importing Africa * -----------------------------------------------------------

foreach page in self_emp employer own_ac own_ac_agri own_ac_other coop{
	import excel raw_datasets\IPUMS_labor\informality\africa_indgen_classwkd.xlsx, sheet("`page'") firstrow clear
		
		* Dropping type_var
		drop type_var
		
		* Renaming industries
		rename NIUnotinuniverse 			niu
		rename Agriculturefishingandfor 	agri
		rename Miningandextraction 			mining
		rename Manufacturing 				mfg
		rename Electricitygaswaterandw 		utilities
		rename Construction 				construct
		rename Wholesaleandretailtrade 		trade
		rename Hotelsandrestaurants 		hospitality
		rename Transportationstorageand 	transport
		rename Financialservicesandinsura	finance
		rename Publicadministrationandde 	govmt
		rename Servicesnotspecified 		unspec_service
		rename Businessservicesandreale 	buss_service
		rename Education 					educ
		rename Healthandsocialwork 			health
		rename Otherservices 				other_services
		rename Privatehouseholdservices		household_serv
		rename Otherindustrynec 			other_industry
		rename Responsesuppressed			supressed
		rename Unknown						unknowns
		rename ROWTOTAL						rowtotal
		rename NoData						nodata
		
		* Renaming variables for reshaping
		foreach v of varlist _all{
		    rename `v' se_`page'`v'
		}
		rename se_`page'census census
		
		* Reshaping
		reshape long se_`page', i(census) j(industry, string)
		
		* Saving
		tempfile africa_ckd_`page'
		save `africa_ckd_`page'', replace
}

* Importing Asia * -------------------------------------------------------------

foreach page in self_emp employer own_ac own_ac_agri own_ac_ntemp own_ac_temp coop{
	import excel raw_datasets\IPUMS_labor\informality\asia_indgen_classwkd.xlsx, sheet("`page'") firstrow clear
		
		* Dropping type_var
		drop type_var
		
		* Renaming industries
		rename NIUnotinuniverse 			niu
		rename Agriculturefishingandfor 	agri
		rename Miningandextraction 			mining
		rename Manufacturing 				mfg
		rename Electricitygaswaterandw 		utilities
		rename Construction 				construct
		rename Wholesaleandretailtrade 		trade
		rename Hotelsandrestaurants 		hospitality
		rename Transportationstorageand 	transport
		rename Financialservicesandinsura	finance
		rename Publicadministrationandde 	govmt
// 		rename Servicesnotspecified 		unspec_service
		rename Businessservicesandreale 	buss_service
		rename Education 					educ
		rename Healthandsocialwork 			health
		rename Otherservices 				other_services
		rename Privatehouseholdservices		household_serv
		rename Otherindustrynec 			other_industry
		rename Responsesuppressed			supressed
		rename Unknown						unknowns
		rename ROWTOTAL						rowtotal
		rename NoData						nodata
		
		* Renaming variables for reshaping
		foreach v of varlist _all{
		    rename `v' se_`page'`v'
		}
		rename se_`page'census census
		
		* Reshaping
		reshape long se_`page', i(census) j(industry, string)
		
		* Saving
		tempfile asia_ckd_`page'
		save `asia_ckd_`page'', replace
}

* Importing Fiji * -------------------------------------------------------------

foreach page in employer own_ac own_ac_subs{
	import excel raw_datasets\IPUMS_labor\informality\fiji_indgen_classwkd.xlsx, sheet("`page'") firstrow clear
		
		* Dropping type_var
		drop type_var
		
		* Renaming industries
		rename NIUnotinuniverse 			niu
		rename Agriculturefishingandfor 	agri
		rename Miningandextraction 			mining
		rename Manufacturing 				mfg
		rename Electricitygaswaterandw 		utilities
		rename Construction 				construct
		rename Wholesaleandretailtrade 		trade
		rename Hotelsandrestaurants 		hospitality
		rename Transportationstorageand 	transport
		rename Financialservicesandinsura	finance
		rename Publicadministrationandde 	govmt
		rename Servicesnotspecified 		unspec_service
		rename Businessservicesandreale 	buss_service
		rename Education 					educ
		rename Healthandsocialwork 			health
		rename Otherservices 				other_services
		rename Privatehouseholdservices		household_serv
		rename Otherindustrynec 			other_industry
// 		rename Responsesuppressed			supressed
		rename Unknown						unknowns
		rename ROWTOTAL						rowtotal
// 		rename NoData						nodata
		
		* Renaming variables for reshaping
		foreach v of varlist _all{
		    rename `v' se_`page'`v'
		}
		rename se_`page'census census
		
		* Reshaping
		reshape long se_`page', i(census) j(industry, string)
		
		* Saving
		tempfile fiji_ckd_`page'
		save `fiji_ckd_`page'', replace
}

* Importing PNG * --------------------------------------------------------------

foreach page in self_emp employer own_ac_agri own_ac_subs own_ac_other{
	import excel raw_datasets\IPUMS_labor\informality\png_indgen_classwkd.xlsx, sheet("`page'") firstrow clear
		
		* Dropping type_var
		drop type_var
		
		* Renaming industries
		rename NIUnotinuniverse 			niu
		rename Agriculturefishingandfor 	agri
		rename Miningandextraction 			mining
		rename Manufacturing 				mfg
		rename Electricitygaswaterandw 		utilities
		rename Construction 				construct
		rename Wholesaleandretailtrade 		trade
		rename Hotelsandrestaurants 		hospitality
		rename Transportationstorageand 	transport
		rename Financialservicesandinsura	finance
		rename Publicadministrationandde 	govmt
*		rename Servicesnotspecified 		unspec_service
		rename Businessservicesandreale 	buss_service
		rename Education 					educ
		rename Healthandsocialwork 			health
		rename Otherservices 				other_services
		rename Privatehouseholdservices		household_serv
*		rename Otherindustrynec 			other_industry
// 		rename Responsesuppressed			supressed
		rename Unknown						unknowns
		rename ROWTOTAL						rowtotal
 		rename NoData						nodata
		
		* Renaming variables for reshaping
		foreach v of varlist _all{
		    rename `v' se_`page'`v'
		}
		rename se_`page'census census
		
		* Reshaping
		reshape long se_`page', i(census) j(industry, string)
		
		* Saving
		tempfile png_ckd_`page'
		save `png_ckd_`page'', replace
}

* Importing Template * ---------------------------------------------------------
* This is just a template with all the different categories of self-employment that will be useful later for merging

foreach page in self_emp employer scrop_employer own_ac own_ac_agri own_ac_dom own_ac_other own_ac_subs coop scrop scrop_self scrop_employee own_ac_ntemp own_ac_temp{
	import excel raw_datasets\IPUMS_labor\informality\template_classwkd.xlsx, sheet("`page'") firstrow clear
		
		* Dropping type_var
		drop type_var
		
		* Renaming industries
		rename NIUnotinuniverse 			niu
		rename Agriculturefishingandfor 	agri
		rename Miningandextraction 			mining
		rename Manufacturing 				mfg
		rename Electricitygaswaterandw 		utilities
		rename Construction 				construct
		rename Wholesaleandretailtrade 		trade
		rename Hotelsandrestaurants 		hospitality
		rename Transportationstorageand 	transport
		rename Financialservicesandinsura	finance
		rename Publicadministrationandde 	govmt
		rename Servicesnotspecified 		unspec_service
		rename Businessservicesandreale 	buss_service
		rename Education 					educ
		rename Healthandsocialwork 			health
		rename Otherservices 				other_services
		rename Privatehouseholdservices		household_serv
		rename Otherindustrynec 			other_industry
		rename Responsesuppressed			supressed
		rename Unknown						unknowns
		rename ROWTOTAL						rowtotal
		rename NoData						nodata
		
		* Renaming variables for reshaping
		foreach v of varlist _all{
		    rename `v' se_`page'`v'
		}
		rename se_`page'census census
		
		* Reshaping
		reshape long se_`page', i(census) j(industry, string)
		
		* Saving
		tempfile template_ckd_`page'
		save `template_ckd_`page'', replace
}

********************************************************************************
* Combining all the different datasets - Appending
********************************************************************************

* CLASSWK ----------------------------------------------------------------------

foreach page in NIU self_employed wage_worker unpaid_worker other unknown all_class no_data{
	
	use `lac_ck_a_`page'', clear
	
		capture noisily append using `lac_ck_b_`page''
		capture noisily append using `africa_ck_a_`page''
		capture noisily append using `africa_ck_b_`page''
		capture noisily append using `asia_ck_`page''
		capture noisily append using `fiji_ck_`page''
		capture noisily append using `png_ck_`page''
	
	drop if census == "COL TOTAL"
	tempfile indgen_classwk_`page'
	save `indgen_classwk_`page'', replace
}

* CLASSWKD ---------------------------------------------------------------------

foreach page in self_emp employer scrop_employer own_ac own_ac_agri own_ac_dom own_ac_other own_ac_subs coop scrop scrop_self scrop_employee own_ac_ntemp own_ac_temp{
	
	use `template_ckd_`page'', clear

	    capture noisily append using `lac_ckd_a_`page''
		capture noisily append using `lac_ckd_b_`page''
		capture noisily append using `africa_ckd_`page''
		capture noisily append using `asia_ckd_`page''
		capture noisily append using `fiji_ckd_`page''
		capture noisily append using `png_ckd_`page''
			
	drop if census == "COL TOTAL"
	drop if census == "template"
	tempfile indgen_classwkd_`page'
	save `indgen_classwkd_`page'', replace
}

********************************************************************************
* Combining all the different datasets - Merging
********************************************************************************

* Creating main dataset using CLASSWK first

use `indgen_classwk_NIU', clear

foreach page in self_employed wage_worker unpaid_worker other unknown all_class no_data{
    merge 1:1 census industry using `indgen_classwk_`page''
	rename _merge _merge_`page'
	save "processed_datasets/dataset_industry_ipums_informality(09.22.2021).dta", replace
}

* ADDING CLASSWKD

use "processed_datasets/dataset_industry_ipums_informality(09.22.2021).dta", clear

foreach page in self_emp employer scrop_employer own_ac own_ac_agri own_ac_dom own_ac_other own_ac_subs coop scrop scrop_self scrop_employee own_ac_ntemp own_ac_temp{
       merge 1:1 census industry using `indgen_classwkd_`page''
	rename _merge _merge_`page'
	save "processed_datasets/dataset_industry_ipums_informality(09.22.2021).dta", replace
}

********************************************************************************
* Generating Shares
********************************************************************************

use "processed_datasets/dataset_industry_ipums_informality(09.22.2021).dta", clear

* Cleaning
drop if industry == "niu"
drop if industry == "nodata"
drop if industry == "rowtotal"
drop if all_class == 0

gen check = 1 if all_class == no_data
drop if check == 1
drop check

* Papua New Guinea doesn't have nodata variable, so we replace missings with 0s
replace no_data = 0 if census == "598198001 Papua New Guinea 1980" & no_data == .
replace no_data = 0 if census == "598200001 Papua New Guinea 2000" & no_data == .

* Consistency with main informality dataset
rename self_employed selfemployed
rename unpaid_worker unpaidworker

* ------------------------------------------------------------------------------
* Generating self-employed variables

* Self-Employed (Unspecified)
rename se_self_emp selfemployed_unspecified

* Self-Employed: Employer
egen selfemployed_employer = rowtotal(se_employer se_scrop_employer)
replace selfemployed_employer = . if se_employer == . & se_scrop_employer == . 

* Self-Employed: Own Account
egen selfemployed_ownaccount = rowtotal(se_own_ac se_own_ac_agri se_own_ac_dom se_own_ac_other se_own_ac_subs se_own_ac_ntemp se_own_ac_temp)
replace selfemployed_ownaccount = . if  se_own_ac == . & se_own_ac_agri == . & se_own_ac_dom == . & se_own_ac_other == . & se_own_ac_subs == . & se_own_ac_ntemp == . & se_own_ac_temp == .

* Self-Employed: Cooperative
rename se_coop selfemployed_cooperative

* Self-Employed: Sharecropper
egen selfemployed_sharecropper = rowtotal(se_scrop se_scrop_self se_scrop_employee)
replace selfemployed_sharecropper = . if se_scrop == . & se_scrop_self == . & se_scrop_employee == .

* Generating Shares
foreach share in selfemployed wage_worker unpaidworker other unknown selfemployed_unspecified selfemployed_employer selfemployed_ownaccount selfemployed_cooperative selfemployed_sharecropper{
    generate s_`share' = (`share'/(all_class-no_data-NIU))
	la var s_`share' "Share of `share'"
}

* Generating country and year variable
gen country = regexs(0) if regexm(census, "(([a-zA-Z]+)[ ]*([a-zA-Z]+))")
gen year = regexs(0) if(regexm(census, "[0-9][0-9][0-9][0-9]$"))

destring year, replace

* Making dataset compatible with main dataset
drop if country == "Armenia"
drop if country == "Palestine"
drop if country == "Saint Lucia"
drop if country == "Israel"
drop if country == "Kyrgyz Republic"
replace country = "Venezuela, RB" if country == "Venezuela"
replace country = "Iran, Islamic Rep." if country == "Iran"
replace country = "Egypt, Arab Rep." if country == "Egypt"
replace country = "Lao PDR" if country == "Laos"
replace country = "Papua New Guinea" if country == "Papua New"
drop if country == "South Sudan"

* Dropping _merge variables
drop _merge*

save "processed_datasets/dataset_industry_ipums_informality(09.22.2021).dta", replace
